{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Comparison between plugin and python results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "from mmsplice.vcf_dataloader import SplicingVCFDataloader\n",
    "from mmsplice import MMSplice, predict_all_table\n",
    "\n",
    "from mmsplice.utils import max_varEff"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mmsplice perl plugin results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run MMSplice and obtain `variant_effect_output.txt` file check VEP_plugin/README.md for more details. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Parses `variant_effect_output.txt` file as dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from mmsplice.utils import read_vep\n",
    "\n",
    "df_plugin = read_vep('../variant_effect_output.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_plugin_predictionsMax = max_varEff(df_plugin).set_index('ID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(17513, 14)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_plugin.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>mmsplice_dlogitPsi</th>\n",
       "      <th>mmsplice_alt_acceptor</th>\n",
       "      <th>mmsplice_alt_acceptorIntron</th>\n",
       "      <th>mmsplice_alt_donor</th>\n",
       "      <th>mmsplice_alt_donorIntron</th>\n",
       "      <th>mmsplice_alt_exon</th>\n",
       "      <th>mmsplice_delta_logit_psi</th>\n",
       "      <th>mmsplice_pathogenicity</th>\n",
       "      <th>mmsplice_ref_acceptor</th>\n",
       "      <th>mmsplice_ref_acceptorIntron</th>\n",
       "      <th>mmsplice_ref_donor</th>\n",
       "      <th>mmsplice_ref_donorIntron</th>\n",
       "      <th>mmsplice_ref_exon</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ID</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17:41197805:ACATCTGCC:['A']</th>\n",
       "      <td>0.048545</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-4.232478</td>\n",
       "      <td>-0.068444</td>\n",
       "      <td>-3.663338</td>\n",
       "      <td>0.048545</td>\n",
       "      <td>0.942513</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-4.232478</td>\n",
       "      <td>-0.068444</td>\n",
       "      <td>-3.694287</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17:41197809:CTGCCCAAT:['C']</th>\n",
       "      <td>-0.208980</td>\n",
       "      <td>4.766502</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-3.931190</td>\n",
       "      <td>0.482525</td>\n",
       "      <td>-4.180473</td>\n",
       "      <td>-0.208980</td>\n",
       "      <td>0.982340</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-3.931190</td>\n",
       "      <td>0.482525</td>\n",
       "      <td>-4.191132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17:41197810:T:['A']</th>\n",
       "      <td>0.098853</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.897650</td>\n",
       "      <td>0.098853</td>\n",
       "      <td>0.872820</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.961106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17:41197811:G:['T']</th>\n",
       "      <td>0.325282</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.751340</td>\n",
       "      <td>0.325282</td>\n",
       "      <td>0.877968</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.961106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17:41197812:C:['A']</th>\n",
       "      <td>0.050051</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.929184</td>\n",
       "      <td>0.050051</td>\n",
       "      <td>0.871686</td>\n",
       "      <td>5.068929</td>\n",
       "      <td>-2.924059</td>\n",
       "      <td>-2.570118</td>\n",
       "      <td>-0.261311</td>\n",
       "      <td>-3.961106</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             mmsplice_dlogitPsi  mmsplice_alt_acceptor  \\\n",
       "ID                                                                       \n",
       "17:41197805:ACATCTGCC:['A']            0.048545               5.068929   \n",
       "17:41197809:CTGCCCAAT:['C']           -0.208980               4.766502   \n",
       "17:41197810:T:['A']                    0.098853               5.068929   \n",
       "17:41197811:G:['T']                    0.325282               5.068929   \n",
       "17:41197812:C:['A']                    0.050051               5.068929   \n",
       "\n",
       "                             mmsplice_alt_acceptorIntron  mmsplice_alt_donor  \\\n",
       "ID                                                                             \n",
       "17:41197805:ACATCTGCC:['A']                    -2.924059           -4.232478   \n",
       "17:41197809:CTGCCCAAT:['C']                    -2.924059           -3.931190   \n",
       "17:41197810:T:['A']                            -2.924059           -2.570118   \n",
       "17:41197811:G:['T']                            -2.924059           -2.570118   \n",
       "17:41197812:C:['A']                            -2.924059           -2.570118   \n",
       "\n",
       "                             mmsplice_alt_donorIntron  mmsplice_alt_exon  \\\n",
       "ID                                                                         \n",
       "17:41197805:ACATCTGCC:['A']                 -0.068444          -3.663338   \n",
       "17:41197809:CTGCCCAAT:['C']                  0.482525          -4.180473   \n",
       "17:41197810:T:['A']                         -0.261311          -3.897650   \n",
       "17:41197811:G:['T']                         -0.261311          -3.751340   \n",
       "17:41197812:C:['A']                         -0.261311          -3.929184   \n",
       "\n",
       "                             mmsplice_delta_logit_psi  mmsplice_pathogenicity  \\\n",
       "ID                                                                              \n",
       "17:41197805:ACATCTGCC:['A']                  0.048545                0.942513   \n",
       "17:41197809:CTGCCCAAT:['C']                 -0.208980                0.982340   \n",
       "17:41197810:T:['A']                          0.098853                0.872820   \n",
       "17:41197811:G:['T']                          0.325282                0.877968   \n",
       "17:41197812:C:['A']                          0.050051                0.871686   \n",
       "\n",
       "                             mmsplice_ref_acceptor  \\\n",
       "ID                                                   \n",
       "17:41197805:ACATCTGCC:['A']               5.068929   \n",
       "17:41197809:CTGCCCAAT:['C']               5.068929   \n",
       "17:41197810:T:['A']                       5.068929   \n",
       "17:41197811:G:['T']                       5.068929   \n",
       "17:41197812:C:['A']                       5.068929   \n",
       "\n",
       "                             mmsplice_ref_acceptorIntron  mmsplice_ref_donor  \\\n",
       "ID                                                                             \n",
       "17:41197805:ACATCTGCC:['A']                    -2.924059           -4.232478   \n",
       "17:41197809:CTGCCCAAT:['C']                    -2.924059           -3.931190   \n",
       "17:41197810:T:['A']                            -2.924059           -2.570118   \n",
       "17:41197811:G:['T']                            -2.924059           -2.570118   \n",
       "17:41197812:C:['A']                            -2.924059           -2.570118   \n",
       "\n",
       "                             mmsplice_ref_donorIntron  mmsplice_ref_exon  \n",
       "ID                                                                        \n",
       "17:41197805:ACATCTGCC:['A']                 -0.068444          -3.694287  \n",
       "17:41197809:CTGCCCAAT:['C']                  0.482525          -4.191132  \n",
       "17:41197810:T:['A']                         -0.261311          -3.961106  \n",
       "17:41197811:G:['T']                         -0.261311          -3.961106  \n",
       "17:41197812:C:['A']                         -0.261311          -3.961106  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_plugin_predictionsMax.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mmsplice python api results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "gtf = '../tests/data/test.gtf'\n",
    "vcf = '../tests/data/test.vcf.gz'\n",
    "fasta = '../tests/data/hg19.nochr.chr17.fa'\n",
    "gtfIntervalTree = '../tests/data/test.pkl' # pickle exon interval Tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "dl = SplicingVCFDataloader(gtfIntervalTree, \n",
    "                          fasta,\n",
    "                          vcf,\n",
    "                          out_file=gtfIntervalTree,\n",
    "                          split_seq=False, overhang=(100,100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/muhammedhasan/Projects/MMSplice/venv/lib/python3.5/site-packages/keras/engine/saving.py:292: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually.\n",
      "  warnings.warn('No training configuration found in save file: '\n"
     ]
    }
   ],
   "source": [
    "model = MMSplice(\n",
    "    exon_cut_l=0,\n",
    "    exon_cut_r=0,\n",
    "    acceptor_intron_cut=6,\n",
    "    donor_intron_cut=6,\n",
    "    acceptor_intron_len=50,\n",
    "    acceptor_exon_len=3,\n",
    "    donor_exon_len=5,\n",
    "    donor_intron_len=13)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0it [00:00, ?it/s]/home/muhammedhasan/Projects/MMSplice/mmsplice/mmsplice.py:162: UserWarning: None GT donor\n",
      "  warnings.warn(\"None GT donor\", UserWarning)\n",
      "/home/muhammedhasan/Projects/MMSplice/mmsplice/mmsplice.py:164: UserWarning: None AG donor\n",
      "  warnings.warn(\"None AG donor\", UserWarning)\n",
      "3it [00:20,  7.34s/it]\n"
     ]
    }
   ],
   "source": [
    "df_python = predict_all_table(model, dl, batch_size=1024, split_seq=False, assembly=True, pathogenicity=True, splicing_efficiency=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>exons</th>\n",
       "      <th>mmsplice_pathogenicity</th>\n",
       "      <th>mmsplice_dse</th>\n",
       "      <th>mmsplice_dlogitPsi</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>17:41197805:ACATCTGCC:['A']</td>\n",
       "      <td>17_41197695_41197819:-</td>\n",
       "      <td>0.871460</td>\n",
       "      <td>-0.024030</td>\n",
       "      <td>0.040370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>17:41197805:ACATCTGCC:['A']</td>\n",
       "      <td>17_41196312_41197819:-</td>\n",
       "      <td>0.908687</td>\n",
       "      <td>-0.119514</td>\n",
       "      <td>0.001848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>17:41197805:ACATCTGCC:['A']</td>\n",
       "      <td>17_41196313_41197819:-</td>\n",
       "      <td>0.949493</td>\n",
       "      <td>-0.119505</td>\n",
       "      <td>0.001852</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>17:41197805:ACATCTGCC:['A']</td>\n",
       "      <td>17_41196822_41197819:-</td>\n",
       "      <td>0.853564</td>\n",
       "      <td>-0.108007</td>\n",
       "      <td>0.006490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17:41197805:ACATCTGCC:['A']</td>\n",
       "      <td>17_41197580_41197819:-</td>\n",
       "      <td>0.941126</td>\n",
       "      <td>-0.012832</td>\n",
       "      <td>0.044888</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            ID                   exons  \\\n",
       "0  17:41197805:ACATCTGCC:['A']  17_41197695_41197819:-   \n",
       "1  17:41197805:ACATCTGCC:['A']  17_41196312_41197819:-   \n",
       "2  17:41197805:ACATCTGCC:['A']  17_41196313_41197819:-   \n",
       "3  17:41197805:ACATCTGCC:['A']  17_41196822_41197819:-   \n",
       "4  17:41197805:ACATCTGCC:['A']  17_41197580_41197819:-   \n",
       "\n",
       "   mmsplice_pathogenicity  mmsplice_dse  mmsplice_dlogitPsi  \n",
       "0                0.871460     -0.024030            0.040370  \n",
       "1                0.908687     -0.119514            0.001848  \n",
       "2                0.949493     -0.119505            0.001852  \n",
       "3                0.853564     -0.108007            0.006490  \n",
       "4                0.941126     -0.012832            0.044888  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_python.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2164, 5)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_python.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_python_predictionsMax = max_varEff(df_python).set_index('ID')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "indexes = list(set(df_plugin_predictionsMax.index) & set(df_python_predictionsMax.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUUAAAEyCAYAAABtU8IkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAE+FJREFUeJzt3W+MXNV9xvHn8drQFQ0xLhuCN1AT13UFMrHTFX9EGpEWsKFqMVRpoaRCahQnUnjTVJawsGqSBjmqm6RvkrS2ikANgaYJbGhA3gBSioSAZOk6rN1oiw0EPBC8CbEShS1s7F9f7KxzvMzszuzcO3PvzPcjjXbmzvWcn0bjR/fcc+49jggBAGYs6XQBAFAkhCIAJAhFAEgQigCQIBQBIEEoAkCCUASABKEIAAlCEQASSztdQOrMM8+MVatWdboMAF3mmWee+UlEDDSyb6FCcdWqVRodHe10GQC6jO0fNbov3WcASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKFmqcIAPMZHqto18iEXjk6pZXL+7V141pt3jCYaRuEIoBSGB6raNv945qaPiZJqhyd0rb7xyUp02Ck+wygFHaNTJwIxFlT08e0a2Qi03YIRQCl8MrRqaa2LxahCKAUVi7vb2r7YhGKAEph68a16l/Wd9K2/mV92rpxbabtMNACoBRmB1MYfQaAqs0bBjMPwbnoPgNAglAEgAShCAAJQhEAEoQiACQIRQBIEIoAkMgkFG3fafuI7f3JttttV2zvqz6uyaItAMhTVkeKd0naVGP7FyNiffXxcEZtAUBuMgnFiHhc0utZfBYAdFLe5xRvsf1stXt9Rq0dbG+xPWp7dHJyMudyAGB+eYbiVyStlrRe0quSPl9rp4jYHRFDETE0MDCQYzkAsLDcQjEiXouIYxFxXNIeSRfl1RYAZCW3ULR9dvLyOkn76+0LAEWRya3DbN8r6XJJZ9o+LGmHpMttr5cUkl6U9PEs2gKAPGUSihFxY43N/5rFZwNAO3FFCwAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgAiUxC0fadto/Y3p9sW2H7EdvPVf+ekUVbAJCnrI4U75K0ac62WyU9FhFrJD1WfQ0AhZZJKEbE45Jen7P5Wkl3V5/fLWlzFm0BQJ7yPKd4VkS8Wn3+Y0ln1drJ9hbbo7ZHJycncywHABbWloGWiAhJUee93RExFBFDAwMD7SgHAOrKMxRfs322JFX/HsmxLQDIRJ6h+KCkm6vPb5b0rRzbAoBMZDUl515JT0paa/uw7Y9K+pykK20/J+mK6msAKLSlWXxIRNxY560/yuLzAaBduKIFABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgsbTTBQBlMjxW0a6RCb1ydEorl/dr68a12rxhsNNlIUO5h6LtFyX9QtIxSb+KiKG82wTyMDxW0bb7xzU1fUySVDk6pW33j0sSwdhF2tV9/lBErCcQUWa7RiZOBOKsqelj2jUy0aGKkAfOKQINeuXoVFPbUU7tCMWQ9B3bz9je0ob2gFysXN7f1HaUUztC8QMR8X5JV0v6pO0Ppm/a3mJ71Pbo5ORkG8oBFmfrxrXqX9Z30rb+ZX3aunFthypCHnIPxYioVP8ekfSApIvmvL87IoYiYmhgYCDvcoBF27xhUDuvX6fB5f2ypMHl/dp5/ToGWbpMrqPPtk+TtCQiflF9fpWkz+TZJpCnzRsGCcEul/eUnLMkPWB7tq2vRcTenNsEWsJcxN6WayhGxPOS3pdnG0CWmIsIpuQACeYiglAEEsxFBNc+o6fNPX/4zv5lOjo1/bb9mIvYOwhF9Kxa5w+X9VnLlljTx+PEfsxF7C10n9Gzap0/nD4W+s3fWMpcxB7GkSJ6Vr3zhEffmNbY313V5mpQFBwpomdxLTNq4UgRPWP78LjuffplHYtQn61L3nuGXv/lWyd1oTl/CEIRPeGmPU/qiUOvn3h9LEJPHHpdl61eoRd/OsXVKziBUETX2z48flIgpp56/mc6tPOaNleEIuOcIrra8FhF9zz1Ut33j0XUfQ+9iVBEV9s1MqH5Yq9v5mYlwAl0n9GVZq9UqSxwed6NF5/TpopQFoQius7cK1XquWz1Cn1287o2VYWyIBTRdWpdqZKypJsuOZdARE2EIrrOfHe0GWTaDRZAKKIrzJ2HWMvg8n49cesftqkilBWhiFIbHqto63/s0/Tx+ffjShU0ilBEaW0fHtc9T70075QbS1ypgqYQiiil4bGKvjrPpOxZL3zuj9tQDboJoYjS2T483lAgAovBFS0olWYC8bLVK3KuBt2IUERpNBuI93zs0pwrQjei+4xSaGTKjSSddkqfDnxmUxsqQrciFFF4V37hu3ruyC8b2veO67hKBa0hFFFoF+7Yq5+/Of81zLPWvOs0pt2gZYQiCut3tj2kXzV4u8PTT+3TI5+6PNd60BsIRRTSqlsfanjfs95xip6+7cocq0EvYfQZhdNMIF62egWBiEwRiiiUZgORaTfIGqGIwmgmECURiMhF7qFoe5PtCdsHbd+ad3sop2YD8UWuaUZOcg1F232SviTpaknnS7rR9vl5tonyIRBRJHkfKV4k6WBEPB8Rb0m6T9K1ObeJEiEQUTR5h+KgpJeT14er206wvcX2qO3RycnJnMtBkRCIKKKOD7RExO6IGIqIoYGBgU6XgzYhEFFUeYdiRVK6sO57qtvQwwhEFFneofh9SWtsn2f7FEk3SHow5zZRYAQiii7Xy/wi4le2b5E0IqlP0p0RcSDPNlFcBCLKIPdrnyPiYUkP590Oio1ARFl0fKAF3Y9ARJkQisgVgYiyIRSRGwIRZUQoIhfNBOJSE4goDkIRmWv2CPHgTgIRxUEoIlN0mVF2hCIy00wgWgQiiolQRCaaPYf4AoGIgiIU0bJmlxDgHCKKjFDEog2PVZoKxI9cci5LCKDwWOIUi7J9eFxffeqlhvf/p79Yz0L1KAWOFNE0AhHdjFBEUwhEdDtCEQ0jENELCEU0hEBEr2CgBQu68gvf1XNHftnw/kzKRplxpIh5EYjoNYQi6rppz5MEInoOoYiatg+P64lDrze8P4GIbsE5RbwNXWb0MkIRJznv1ocUTexPIKLb0H3GCQQiQCii6sIdewlEQIQiJF18xyP6+ZvHGt6fQEQ3IxR73MV3PKLXfvFWw/sTiOh2DLT0sN+77WH937HGO80EInoBodijWGAKqI3ucw9igSmgPkKxx1y4Y29T+7PAFHoNodhDbtrzJKPMwAI4p9gjmrl0z+IIEb0rtyNF27fbrtjeV31ck1dbmN+FO/YSiECD8j5S/GJE/GPObWAezUzMXmqxJjN6Ht3nLtbMPEQCEZiR90DLLbaftX2n7TNq7WB7i+1R26OTk5M5l9M7Ltyxt+FAPP3UPgIRqHJEM7cBmPOP7UclvbvGW7dJekrSTySFpL+XdHZE/PV8nzc0NBSjo6OLrgczmrl07/RT+/TspzflXBHQWbafiYihRvZtqfscEVc0WNAeSd9upS005sIdexs+h3jWO07R07ddmXNFQLnkdk7R9tkR8Wr15XWS9ufVFmY0Mw+RJUiB2vIcaPkH2+s1031+UdLHc2yr592058mG11T5yCXnEohAHbmFYkT8VV6fjZM1E4hr3nWaPrt5Xc4VAeXFlJySa+ZKlY9cci6BCCyAa59LrJl1mQlEoDGEYkk1sy4zgQg0ju5zCTUzD5FRZqA5HCmWzE17nmwoEJctIRCBxeBIsWQa6TKveddpeuRTl+dfDNCFCMWS2D48rnuffnnB/S5bvUL3fOzSNlQEdCdCMWPDYxXtGpnQK0entHJ5v7ZuXNtyF7bReYinndJHIAItIhQzNDxW0bb7xzU1PXOpXeXolLbdPy5JiwrG4bGKPv2fB/SzN6Yb2v+O6xhhBlrFQEuGdo1MnAjEWVPTx7RrZKLpz5oN2EYC8dSlSxhUATLCkWKGXjk61dT2+Wy7/1lNTR+fd58+W4d2XnOiy/43/74vsy470Ks4UszQyuX9TW2vZ/vw+IKBKEk3XnzOiSPKytEphX7dZR8eqzTVJoAZhGKGtm5cq/5lfSdt61/Wp60b1zb074fHKtrwme/oq0+9tOC+l61eoc9uXpdplx0A3edMzXZZFzP6PDxW0dZv/EDTCywhsLx/mW7/0wtOfGaWXXYAhGLmNm8YXNT5vF0jEwsG4hJL+3ZcddK2lcv7VakRgM122QHMoPtcEI0c2f3lxee+bVurXXYAJ+NIsYPSid5LbB2bZxGxene6aaXLDuDtCMUOmTvRu14gLlti7frw++YNucV22QG8Hd3nDqk1aizNnDectbx/2YKBCCBbHCl2SL1ziBHSi59jYXqgUwjFNmrkHCKjxkBnEYpt0sg5REaNgc4jFNtgeKyiv/36D2oGYZ+t4xGMGgMFQSjmbPYIsd7o8vEIvcA5RKAwGH3OWb1R5lmcQwSKhVDM2XxXqnAOESgeQjFn9Y4E+2ztvH4d5xCBgiEUc1bv2uTP/zmTsoEiYqAlZ1ybDJQLoVhDOsn6nf3LZEtH35hedKBxbTJQHi11n21/2PYB28dtD815b5vtg7YnbG9srcz2mXt7/6NT0/rZG9Pc6h/oEa2eU9wv6XpJj6cbbZ8v6QZJF0jaJOnLtvve/s+LZ6EpNNzqH+huLYViRPwwImolxLWS7ouINyPiBUkHJV3USlvt0sjNXrnVP9C98hp9HpT0cvL6cHVb4TUymZoJ10D3WjAUbT9qe3+Nx7VZFGB7i+1R26OTk5NZfGRLak2hSTHhGuhuC44+R8QVi/jciqRzktfvqW6r9fm7Je2WpKGhoflXbmqDuVNoshh9BlAeeU3JeVDS12x/QdJKSWskfS+ntjLHFBqgd7U6Jec624clXSrpIdsjkhQRByR9XdL/SNor6ZMRUX9IFwAKoqUjxYh4QNIDdd67Q9IdrXw+ALQb1z4DQIJQBIAEoQgACUIRABKEIgAkCEUASBCKAJAgFAEgQSgCQIJQBIAEoQgACUIRABI9t5pfulIf90cEMFdPheLsSn2zC1PNrs4niWAEIKnHus+1VupjdT4AqZ4KxXqr8LE6H4BZPRWK9VbhY3U+ALN6KhRrrdTH6nwAUj010DJ3pT5GnwHM1VOhKLFSH4D59VT3GQAWQigCQIJQBIAEoQgACUIRABKlHX3mxg4A8lDKUOTGDgDyUsruMzd2AJCXUoYiN3YAkJdShiI3dgCQl1KGIjd2AJCXlkLR9odtH7B93PZQsn2V7Snb+6qPf2691F/bvGFQO69fp8Hl/bKkweX92nn9OgZZALSs1dHn/ZKul/QvNd47FBHrW/z8urixA4A8tBSKEfFDSbKdTTUA0GF5nlM8z/aY7f+y/Qf1drK9xfao7dHJyckcywGAhS14pGj7UUnvrvHWbRHxrTr/7FVJ50bET23/vqRh2xdExM/n7hgRuyXtlqShoaFovHQAyN6CoRgRVzT7oRHxpqQ3q8+fsX1I0u9KGm26QgBoo1y6z7YHbPdVn79X0hpJz+fRFgBkqdUpOdfZPizpUkkP2R6pvvVBSc/a3ifpG5I+ERGvt1YqAOSv1dHnByQ9UGP7NyV9s5XPBoBOKOUVLQCQF0cUZ8DX9qSkH9V5+0xJP2ljOYtR9BqLXp9EjVkpeo3tru+3I2KgkR0LFYrzsT0aEUML79k5Ra+x6PVJ1JiVotdY5ProPgNAglAEgESZQnF3pwtoQNFrLHp9EjVmpeg1Fra+0pxTBIB2KNORIgDkjlAEgEShQ7FTd/bOosbqe9tsH7Q9YXtjp2pM2b7ddiX57q7pdE2zbG+qflcHbd/a6Xrmsv2i7fHq91aIm5vYvtP2Edv7k20rbD9i+7nq3zMKWGNhf4eFDkX9+s7ej9d471BErK8+PtHmulI1a7R9vqQbJF0gaZOkL8/eJKMAvph8dw93uhhJqn43X5J0taTzJd1Y/Q6L5kPV760oc+zu0szvK3WrpMciYo2kx6qvO+kuvb1GqYC/Q6ngoRgRP4yIQi/mPE+N10q6LyLejIgXJB2UdFF7qyuViyQdjIjnI+ItSfdp5jvEPCLicUlzb7ZyraS7q8/vlrS5rUXNUafGwip0KC6goTt7d9CgpJeT14er24rgFtvPVrs1He1aJYr8fc0KSd+x/YztLZ0uZh5nRcSr1ec/lnRWJ4uZRxF/h50PRduP2t5f4zHfUcLsnb03SPqUpK/ZPr1gNXbMAvV+RdJqSes18z1+vqPFlssHIuL9munif9L2Bztd0EJiZs5dEefdFfZ32Opqfi0rw529F1OjpIqkc5LX76luy12j9dreI+nbOZfTqI59X42KiEr17xHbD2imy1/rfHenvWb77Ih41fbZko50uqC5IuK12ecF+x12/khxMUpyZ+8HJd1g+1Tb52mmxu91uCZV/5PMuk4zA0VF8H1Ja2yfZ/sUzQxSPdjhmk6wfZrtd8w+l3SVivPdzfWgpJurz2+WVG8tpY4p8O9QiojCPjTzZR3WzFHha5JGqtv/TNIBSfsk/bekPylajdX3bpN0SNKEpKs7/X1Wa/o3SeOSntXMf56zO11TUts1kv63+p3d1ul65tT2Xkk/qD4OFKU+Sfdqpvs5Xf0dflTSb2lm1Pk5SY9KWlHAGgv7O+QyPwBIlLL7DAB5IRQBIEEoAkCCUASABKEIAAlCEQAShCIAJP4ftBIvsST0sHEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 360x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "fig = plt.figure(figsize=(5, 5))\n",
    "ax = fig.add_subplot(111)\n",
    "ax.scatter(\n",
    "    df_plugin_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'],\n",
    "    df_python_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9985859147942231"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from scipy.stats import pearsonr\n",
    "\n",
    "pearsonr(df_plugin_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'], \n",
    "         df_python_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'])[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(0 !=\n",
    "    df_plugin_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'] - \n",
    "    df_python_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "diff_series = df_plugin_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi'] - df_python_predictionsMax.loc[indexes, 'mmsplice_dlogitPsi']\n",
    "diff_series = diff_series[diff_series != 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Following handled by plugin not python. This is fine due to extra exons."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(set(df_plugin_predictionsMax.index) - set(df_python_predictionsMax.index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(set(df_python_predictionsMax.index) - set(df_plugin_predictionsMax.index))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
